Workflow:
knitr::opts_chunk$set(echo=T, message=F, warning=F)
library(tidyverse)
library(sf)
library(geosphere)
library(mapview)
library(robis) # devtools::install_github('iobis/robis')
library(robis2) # devtools::install_github('iobis/robis2')
library(rmapshaper)
library(geojsonio)
library(scales)
library(DT)
library(leaflet)
library(htmltools)
library(htmltools)
if (basename(getwd()) != 'technical') setwd('technical')
roi = 'cmar'
roi_url = 'https://chm.cbd.int/api/v2013/documents/FB73B0D1-64FB-367F-405A-51AAA4C060F7/attachments/ETTP_8_EBSA.geojson'
#roi = 'clipperton'
#roi_url = 'https://chm.cbd.int/api/v2013/documents/494D9489-5D08-524D-84AE-AD6817304655/attachments/ETTP_2_EBSA.geojson'
roi_geo = sprintf('cache/%s.geojson', roi)
occ_csv = sprintf('cache/%s_occ.csv', roi)
occ_cklist_csv = sprintf('cache/%s_occ_checklist.csv', roi)
occ_txt = sprintf('cache/%s_occ_nwkt.txt', roi)
occ_geo = sprintf('cache/%s_occ.geojson', roi)
eez_rdata = 'cache/eez.rdata'
eez_geo = sprintf('cache/%s_eez.geojson', roi)
wdpa_geo = sprintf('cache/%s_wdpa.geojson', roi)
wdpa_eez_geo = sprintf('cache/%s_wdpa_eez.geojson', roi)
occ_eez_wdpa_csv = sprintf('cache/%s_occ_eez_wdpa.csv', roi)
#library(mapedit)
#editMap()
Fetch EBSA polygon from Ecologically or Biologically Significant Marine Areas (EBSAs) -> Record for Corredor Marino del Pacífico Oriental tropical.
if (!file.exists(roi_geo)){
roi_sf = read_sf(roi_url)
write_sf(roi_sf, roi_geo)
}
roi_sf = read_sf(roi_geo)
mapview(roi_sf)
In order to query OBIS and other web services, well known text (WKT) is needed which when passed through a URL has a limited number of characters to pass. To reduce this size, buffer by 0.1 decimal degrees and simplify the polygon (by 95%).
roi_smp = roi_sf %>%
st_buffer(dist=0.1) %>%
geojson_json() %>%
ms_simplify() %>%
geojson_sp() %>%
st_as_sf()
roi0_wkt = roi_sf %>%
st_geometry() %>%
st_as_text()
roi_wkt = roi_smp %>%
st_geometry() %>%
st_as_text()
mapview(roi_smp)
The WKT representation got reduced from 120,525 to 3,470 characters.
Fetch OBIS records using the newer iobis/robis2 package that retrieves records really fast using ElasticSearch.
cols_drop = c(
'acceptedNameUsage','acceptedNameUsageID','accessRights','associatedMedia','associatedReferences','associatedSequences','associatedTaxa',
'basisOfRecord','behavior','bibliographicCitation',
'collectionID','continent','coordinatePrecision','coordinateUncertaintyInMeters','countryCode','county',
'dataGeneralizations','datasetID','dateIdentified','division','dynamicProperties',
'establishmentMeans','eventID','eventRemarks','eventTime',
'fieldNotes','fieldNumber','footprintSRS','footprintWKT','forma',
'geodeticDatum',
'habitat','higherClassification','higherGeography','higherGeographyID',
'identificationID','identificationQualifier','identificationReferences','identificationRemarks','identifiedBy','individualCount',
'individualID','informationWithheld','infraclass','infrakingdom','infraorder','infraphylum','institutionID','island','islandGroup',
'language','lifeStage','locality','locationAccordingTo','locationID','locationRemarks',
'materialSampleID','maximumDepthInMeters','minimumDepthInMeters','modified','municipality',
'nameAccordingTo','nameAccordingToID','namePublishedIn','namePublishedInID',
'occurrenceID','occurrenceRemarks','occurrenceStatus','originalNameUsage','originalNameUsageID','otherCatalogNumbers','ownerInstitutionCode',
'parvorder',
'section',
'recordNumber','recordedBy','references','reproductiveCondition','rights','rightsHolder',
'scientificNameAuthorship','sex','source',
'stateProvince','subclass','subdivision','subfamily','subforma','subgenus','subkingdom','suborder',
'subphylum','subsection','subspecies','subterclass','subtribe','subvariety',
'superclass','superfamily','superorder','supertribe',
'taxonConceptID','taxonID','taxonRank','taxonRemarks','taxonomicStatus',
'tribe','type','typeStatus',
'variety','vernacularName',
'waterBody',
'phylum','class','order','family','genus','species') # covered by checklist
if (any(!file.exists(occ_txt), !file.exists(occ_csv))){
occ_tbl = robis2::occurrence(geometry=roi_wkt) %>%
as_tibble()
occ_tbl = occ_tbl %>%
select_(.dots = setdiff(names(occ_tbl), cols_drop))
write_lines(nrow(occ_tbl), occ_txt)
write_csv(occ_tbl, occ_csv)
}
occ_tbl = read_csv(occ_csv)
occ_nwkt = read_lines(occ_txt) %>% as.integer()
occ_tbl %>%
head()
Above are the first 6 of 86,052 records from the generalized WKT.
if (!file.exists(occ_cklist_csv)){
occ_cklist = robis::checklist(geometry=roi_wkt) # 1.6 min
# get unique taxonomic name (tname), prioritizing beyond with first non-NA of: valid, worms_id, redlist, status, phylum,...
occ_cklist = occ_cklist %>%
mutate(
invalid = valid_id != id) %>%
arrange(tname,invalid,worms_id,redlist,status,phylum,class,order,family,genus,species) %>%
group_by(tname) %>%
summarize(
worms_id = first(worms_id),
checklist_id = first(id),
valid_id = first(parent_id),
rank_name = first(rank_name),
tauthor = first(tauthor),
phylum = first(phylum),
class = first(class),
order = first(order),
family = first(family),
genus = first(genus),
species = first(species),
redlist = first(redlist),
status = first(status),
wktpoly_sum_records = sum(records),
wktpoly_sum_datasets = sum(datasets))
write_csv(occ_cklist, occ_cklist_csv)
}
occ_cklist = read_csv(occ_cklist_csv)
occ_head = head(occ_tbl)
View(occ_head)
table(occ_tbl$taxonomicgroup, useNA='ifany') # 0 NAs
# occ_tbl$taxonomicgroup YES!
sum(duplicated(occ_cklist$id)) # 0
sum(duplicated(occ_cklist$worms_id)) # 154
sum(duplicated(occ_cklist$tname[duplicated(occ_cklist$tname)])) # 0
occ_cklist_worms_dupes = occ_cklist %>%
filter(
worms_id %in% occ_cklist$worms_id[duplicated(occ_cklist$worms_id)] |
tname %in% occ_cklist$tname[duplicated(occ_cklist$tname)]) %>%
arrange(worms_id)
View(occ_cklist_worms_dupes)
Next, convert the table having decimalLongitude and decimalLatitude into points and filter out points that were in the generalized WKT but not the original ROI polygon.
if (!file.exists(occ_geo)){
occ_tbl = read_csv(occ_csv)
# turn obis into sf
occ_sf = st_sf(
occ_tbl,
geometry = st_sfc(
with(
occ_tbl,
map2(decimalLongitude, decimalLatitude, function(x, y) st_point(c(x, y)))))) %>%
st_set_crs(4326)
# rm obis outside original roi: 137,277 -> 64,342
occ_sf = occ_sf %>%
slice(st_intersects(roi_sf, occ_sf)[[1]])
# write filtered csv
occ_sf %>%
st_set_geometry(NULL) %>%
write_csv(occ_csv)
# write geo with just id to save disk space, for later rejoining with occ_tbl
if (sum(duplicated(occ_tbl$id)) > 0) stop('Whoah! Expecting unique id in OBIS occ_tbl.')
occ_sf %>%
select(id) %>%
write_sf(occ_geo, delete_dsn=T)
}
occ_tbl = read_csv(occ_csv)
occ_sf = read_sf(occ_geo) %>%
left_join(
occ_tbl, by='id')
leaflet(occ_sf) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(
data = roi_sf) %>%
addMarkers(
clusterOptions = markerClusterOptions())
Above are the OBIS occurrence points, filtered to the original ROI polygon (86,052 to 64,118 records). Click on the marker cluster to zoom and explode into finer clusters and eventually individual points.
Get the exclusive economic zones (EEZs) within the ROI region of interest to get at areas of political interest and management.
If we already knew the country EEZs that overlapped with the ROI region, we could use the unevaluated code below that Eduardo Klein Sala shared originally from Pieter Provost using the mregions package.
library(mregions)
eez_names = mr_names('MarineRegions:eez') # View(n)
eez_id = mr_names_search(eez_names, 'Belgian')$id[1]
eez_json = mr_features_get('MarineRegions:eez', eez_id, format='json')
eez_sf = eez_json %>%
as.json() %>%
geojson_sp() %>%
st_as_sf()
mapview(eez_sf)
However am attempting to generalize this analysis so that ROI could be swapped for any region of interest. In that case we need to fetch the entire EEZ dataset (takes ~ 2 min by hitting the MarineRegions.org WFS server at VLIZ.be) and subset based on intersecting EEZs. If the mr_records_by_type() or other function in the mregions package were complete with the bounding box fields (minLatitude, minLongitude, maxLatitude, maxLongitude) we wouldn’t need to do this.
if (!file.exists(eez_rdata)){
eez_url = 'http://geo.vliz.be/geoserver/MarineRegions/ows?service=WFS&version=1.0.0&request=GetFeature&typeName=MarineRegions:eez&maxFeatures=300'
eez_all_sf = read_sf(eez_url) # 2.2 min
saveRDS(eez_all_sf, file=eez_rdata) # store compressed
}
eez_all_sf = readRDS(eez_rdata)
bbox_ply = function(bb){
st_sfc(st_polygon(list(matrix(c(
bb[['xmin']], bb[['ymin']],
bb[['xmin']], bb[['ymax']],
bb[['xmax']], bb[['ymax']],
bb[['xmax']], bb[['ymin']],
bb[['xmin']], bb[['ymin']]), ncol=2, byrow=T)))) %>%
st_set_crs(attr(bb,'crs'))
}
if (!file.exists(eez_geo)){
# filter by eez's within roi
eez_sf = eez_all_sf %>%
slice(st_intersects(st_bbox(roi_sf) %>% bbox_ply(), eez_all_sf)[[1]]) %>%
st_intersection(roi_sf %>% select())
# update areas
eez_sf = eez_sf %>%
rename(
area_eezall_km2 = area_km2) %>%
mutate(
area_eezroi_km2 = st_area(st_geometry(eez_sf)))
write_sf(eez_sf, eez_geo, delete_dsn=T)
}
eez_sf = read_sf(eez_geo)
labels <- with(
eez_sf,
sprintf(
"<strong>%s - %s</strong><br/>area_eezroi_km2: %s<br/>area_eezroi_km2: %s",
sovereign1, territory1, comma(round(area_eezroi_km2,2)), comma(round(area_eezall_km2,2)))) %>% lapply(HTML)
leaflet(roi_sf) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons() %>%
addPolygons(
data = eez_sf,
color = 'red', fillColor = 'red',
label = labels,
highlight = highlightOptions(
color = 'yellow',
weight = 5,
bringToFront = T))
We can use the Protected Planet ESRI Web Service Query to construct an ESRI Query (Operation) on the bounding box of the ROI region. This could be limited to the fields available.
q_url = 'http://ec2-54-204-216-109.compute-1.amazonaws.com:6080/arcgis/rest/services/wdpa/wdpa/MapServer/1/query'
bb = st_bbox(roi_sf)
if (!file.exists(wdpa_geo)){
res = httr::GET(q_url, query = list(
f = 'json',
geometry = sprintf('%0.1f,%0.1f,%0.1f,%0.1f', bb[['xmin']], bb[['ymin']], bb[['xmax']], bb[['ymax']]),
geometryType = 'esriGeometryEnvelope',
inSR = 4326,
outSR = 4326,
outFields = '*'))
wdpa_sf = httr::content(res) %>%
read_sf() # wdpa0_sf = wdpa_sf; mapview(wdpa0_sf)
if (nrow(wdpa_sf) == 0){
warning('no wdpa')
write_sf(wdpa_sf, wdpa_geo, delete_dsn=T)
} else {
# filter by eez's within roi
wdpa_sf = wdpa_sf %>%
slice(st_intersects(roi_sf, wdpa_sf)[[1]])
wdpa_sf = wdpa_sf %>%
mutate(
area_wdpaall_km2 = st_area(st_geometry(wdpa_sf)))
wdpa_sf = wdpa_sf %>%
st_intersection(roi_sf %>% select())
wdpa_sf = wdpa_sf %>%
mutate(
area_wdparoi_km2 = st_area(st_geometry(wdpa_sf)))
write_sf(wdpa_sf, wdpa_geo, delete_dsn=T)
}
}
wdpa_sf = read_sf(wdpa_geo)
if (nrow(wdpa_sf) == 0){
warning('no wdpa')
} else {
labels <- with(
wdpa_sf,
sprintf(
"<strong>%s</strong><br/>DESIG_ENG: %s<br/>DESIG_TYPE: %s<br/>IUCN CAT: %s<br/>GIS_M_AREA: %s",
NAME, DESIG_ENG, DESIG_TYPE, IUCN_CAT, comma(round(GIS_M_AREA,2)))) %>% lapply(HTML)
leaflet(roi_sf) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons() %>%
addPolygons(
data = wdpa_sf,
weight = 1,
color = 'green', fillColor = 'green',
label = labels,
highlight = highlightOptions(
color = 'yellow',
weight = 5,
bringToFront = T))
}
if (!file.exists(wdpa_eez_geo)){
wdpa_eez_sf = wdpa_sf %>%
st_intersection(eez_sf)
write_sf(wdpa_eez_sf, wdpa_eez_geo, delete_dsn=T)
}
wdpa_eez_sf = read_sf(wdpa_eez_geo)
labels = with(
wdpa_eez_sf,
sprintf(
"<strong>%s</strong> in <strong>%s - %s</strong><br/>DESIG_ENG: %s<br/>DESIG_TYPE: %s<br/>IUCN CAT: %s<br/>GIS_M_AREA: %s",
NAME, sovereign1, territory1, DESIG_ENG, DESIG_TYPE, IUCN_CAT, comma(round(GIS_M_AREA,2)))) %>% lapply(HTML)
wdpa_eez_sf$sovereign1 = factor(wdpa_eez_sf$sovereign1)
pal = colorFactor('Spectral', wdpa_eez_sf$sovereign1)
leaflet(roi_sf) %>%
addProviderTiles(providers$Stamen.TonerLite) %>%
addPolygons(
weight = 0.5,
opacity = 0.2) %>%
addPolygons(
data = wdpa_eez_sf,
weight = 1,
fillColor = ~pal(sovereign1), # color = ~pal(sovereign1),
label = labels,
highlight = highlightOptions(
color = 'yellow',
weight = 5,
bringToFront = T))